clear
set more off
cap log close
set seed 1234

***************************************************************************************************
* 
* Program: make_PH_voting_file
* Purpose: Construct voting analysis file
*
***************************************************************************************************

* set directories
global demo              ""
 // directory for storing demolition related files
global temp              ""
 // directory for temporary files created for this project				     
global xwalk_assist      ""
 // directory for cross-walk file which contains IDs for the social assistance files
global match_chapin      ""
 // directory for Chapin files
global voting            ""
 // directory for L2 voting records
global beat_crime        ""
 // directory for crime rates by police beat

********************************************************************************
*** 1. Obtain name information
********************************************************************************

gzuse "${demo}analysis_file_final.dta.gz", clear
 // created in "3_make_PH_panel_final"

* obtain chdhsid_200606
!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz"
merge n:1 chdhsid_201212 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keep(1 3) keepusing(chdhsid_200606)
 // created in "3_make_PH_panel_final"
!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"
assert _merge==3
drop _merge

* merge with name info from Chapin
merge n:1 chdhsid_200606 using "${match_chapin}cdb_memcase_200606_undup_master.dta", keep(1 3) keepusing(fname mname lname namsuffx stdfname altlname brthdate brthdate1 brthdate2 brthdate3 brthdate4) 
assert _merge==3
drop _merge

* summarize missing
foreach var of varlist fname mname lname namsuffx stdfname altlname brthdate brthdate1 brthdate2 brthdate3 brthdate4 {
 display "`var'"
 count if missing(`var') & tag==1
 }
 
* count persons
count if fname~="" & lname~="" & birthdate~="" & tag==1

* # of characters in fname
gen length_fname = strlen(fname)
gen length_lname = strlen(lname)

sum length_fname if tag==1, det
sum length_lname if tag==1, det

* drop these vars
drop length_fname length_lname

gzsave "${demo}analysis_file_final_wnm.dta.gz", replace

***************************************************************************************************
*  Merge with voting records
***************************************************************************************************

*** Open PH demolition panel (has names attached)
gzuse "${demo}analysis_file_final_wnm.dta.gz", clear

* count persons
count if fname~="" & lname~="" & birthdate~="" & tag==1

* # of characters in fname
gen length_fname = strlen(fname)
gen length_lname = strlen(lname)

sum length_fname if tag==1, det
sum length_lname if tag==1, det

gen first_name = fname
gen last_name = lname

* clean name variables
replace first_name = strltrim(first_name)
replace last_name = strltrim(last_name)

replace first_name = subinstr(first_name,"'","",.)
replace last_name = subinstr(last_name,"'","",.)

gen birth_year = substr(birthdate,1,4)
gen birth_month = substr(birthdate,5,2)
gen birth_day = substr(birthdate,7,2)

destring birth_year, replace
destring birth_month, replace 
destring birth_day, replace

* checks
assert birth_month<13
assert birth_day<32

*** Prepare IL voting file
preserve
gzuse "${voting}illinois_voterfile_v2.dta.gz", clear

* create variables for merge
gen birth_year = year(voters_birthdate)
gen birth_month = month(voters_birthdate)
gen birth_day = day(voters_birthdate)
assert birth_month<13 if birth_month~=.
assert birth_day<32 if birth_month~=.
gen male = (female==.)
drop female

* harmonize
replace first_name = upper(first_name)
replace last_name = upper(last_name)

* create dummies
foreach var of varlist general_* primary_* {
 gen temp = (`var' == "Y")
 drop `var'
 rename temp `var'
 }
 
drop voters_birthdate

gen str state_vote = "IL"

* save temp file
tempfile temp_voter_IL
save "`temp_voter_IL'", replace
restore

*** Prepare voting Indiana
preserve
gzuse "${voting}indiana_voterfile.dta.gz", clear

gen birth_year = substr(voters_birthdate,1,4)
gen birth_month = substr(voters_birthdate,6,2)
gen birth_day = substr(voters_birthdate,9,2)

destring birth_month, replace
destring birth_day birth_year, replace

assert birth_month<13 if birth_month~=.
assert birth_day<32 if birth_month~=.

* harmonize
replace first_name = upper(first_name)
replace last_name = upper(last_name)

gen str state_vote = "IN"

gen registered_anything=1

* save temp file
tempfile temp_voter_IN
save "`temp_voter_IN'", replace
restore

*** Prepare "Border state" voting file
preserve
gzuse "${voting}borderstates_2019_voterfile.dta.gz", clear
 
gen state_vote = substr(lalvoterid,4,2)

split voters_birthdate, gen(split_) parse(/)

rename split_1 birth_month
rename split_2 birth_day
rename split_3 birth_year 

destring birth_month, replace
destring birth_day birth_year, replace

assert birth_month<13 if birth_month~=.
assert birth_day<32 if birth_month~=.

* harmonize
replace first_name = upper(first_name)
replace last_name = upper(last_name)

drop if state_vote=="IL"

* create dummies
foreach var of varlist general_* primary_* {
 gen temp = (`var' == "Y")
 drop `var'
 rename temp `var'
 }

* append with Indiana voting data
append using `temp_voter_IN'

* save temp file
tempfile temp_voter_border_states
save "`temp_voter_border_states'", replace
restore

* create a cross-sectional file
keep chdhsid_201212 first_name last_name brthdate birth_year birth_month birth_day

duplicates drop

* duplicates in terms of merge list
unique first_name last_name birth_year birth_month birth_day
unique chdhsid_201212 first_name last_name birth_year birth_month birth_day
 // unique chdhsid, as expected

* identify duplicates by name variables
sort first_name last_name birth_year birth_month birth_day
bysort first_name last_name birth_year birth_month birth_day: egen N = total(1==1)
drop if N>1
 // drop duplicates to insure proper merge with voting file
drop N
 
* merge with IL voter file
merge 1:n first_name last_name birth_year birth_month birth_day using `temp_voter_IL', keep(1 3)
gen IL_voter = (_merge==3)
drop _merge

* identify duplicates stemming from multiple matches in the voter file for a given name / date
gen runiform = runiform()
sort first_name last_name birth_year birth_month birth_day runiform
by first_name last_name birth_year birth_month birth_day: gen n = _n
tab n if birth_year>1979
keep if n==1
drop n runiform

unique chdhsid_201212
 // file is unique at this level
 
tab IL_voter, m
tab general_2008, m
 
* merge with border states voter files
merge 1:n first_name last_name birth_year birth_month birth_day using `temp_voter_border_states', keep(1 3 4 5) update
gen border_voter = (_merge==3 | _merge==4)
gen border_dup = (_merge==5)
tab _merge state_vote 
drop _merge 
 
tab general_2008, m

* identify duplicates stemming from multiple matches in the voter file for a given name / date
gen runiform = runiform()
sort first_name last_name birth_year birth_month birth_day runiform
by first_name last_name birth_year birth_month birth_day: gen n = _n
tab n if birth_year>1979
keep if n==1
drop n runiform

*** create final variables for voting analysis

* harmonize variables
foreach var of varlist voted_ever voted_primary registered_* general_* primary_* {
 replace `var' = 0 if missing(`var')
 }

* age at different points in time
forvalues i=2000(2)2018 {
 gen age_`i' = `i'-birth_year
 gen eligible_`i' = (age_`i'>=18)
 }
 
* use age at the time of election to define missing outcomes (so that they are not included in regressions below)
forvalues i=2000(2)2018 {
 replace general_`i' = . if age_`i'<18
 replace primary_`i' = . if age_`i'<18
 }
 
* number of elections voted
egen pres_elect_voted = rowtotal(general_2000 general_2004 general_2008 general_2012 general_2016)
egen general_election_voted = rowtotal(general_2000 general_2002 general_2004 general_2006 general_2008 general_2010 general_2012 general_2014 general_2016 general_2018) 

* number eligible
gen pres_elect_eligible = eligible_2000 + eligible_2004 + eligible_2008 + eligible_2012 + eligible_2016 // these are never missing
gen general_elect_eligible = eligible_2000 + eligible_2002 + eligible_2004 + eligible_2006 + eligible_2008 + eligible_2010 + eligible_2012 + eligible_2014 + eligible_2016 + eligible_2018 
 
* voted shares
gen pres_voted_share = pres_elect_voted/pres_elect_eligible 
gen voted_share = general_election_voted/general_elect_eligible

* checks
assert pres_voted_share<=1
assert voted_share<=1

* voting by age 24 (only defined for persons who turn 24 during 2000-2018)
gen year_age24 = birth_year+24
gen voted_by_age24 = 0 if year_age24>=2000 & year_age24<=2018
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24==2018 | year_age24==2019)
  // this line applies to no one since everyone hits age 24 before 2018
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1) & (year_age24==2016 | year_age24==2017)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1) & (year_age24==2014 | year_age24==2015)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1) & (year_age24==2012 | year_age24==2013)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1) & (year_age24==2010 | year_age24==2011)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1) & (year_age24==2008 | year_age24==2009)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1) & (year_age24==2006 | year_age24==2007)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1) & (year_age24==2004 | year_age24==2005)
 replace voted_by_age24 = 1 if (general_2000==1 | general_2002==1) & (year_age24==2002 | year_age24==2003)
 replace voted_by_age24 = 1 if (general_2000==1) & (year_age24==2000 | year_age24==2001)
 
gen voted_after_age24 = 0 if year_age24<=2018
 replace voted_after_age24 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2000)
 replace voted_after_age24 = 1 if (general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2002)
 replace voted_after_age24 = 1 if (general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2004)
 replace voted_after_age24 = 1 if (general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2006)
 replace voted_after_age24 = 1 if (general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2008)
 replace voted_after_age24 = 1 if (general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2010)
 replace voted_after_age24 = 1 if (general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2012)
 replace voted_after_age24 = 1 if (general_2014==1 | general_2016==1 | general_2018==1) & (year_age24>=2014)
 replace voted_after_age24 = 1 if (general_2016==1 | general_2018==1) & (year_age24>=2016)
 replace voted_after_age24 = 1 if (general_2018==1) & (year_age24>=2018)
 
* voting by age 22 (only defined for persons who turn 22 during 2000-2018)
gen year_age22 = birth_year+22
gen voted_by_age22 = 0 if year_age22>=2000 & year_age22<=2018
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22==2018 | year_age22==2019)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1) & (year_age22==2016 | year_age22==2017)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1) & (year_age22==2014 | year_age22==2015)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1) & (year_age22==2012 | year_age22==2013)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1) & (year_age22==2010 | year_age22==2011)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1) & (year_age22==2008 | year_age22==2009)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1) & (year_age22==2006 | year_age22==2007)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1) & (year_age22==2004 | year_age22==2005)
 replace voted_by_age22 = 1 if (general_2000==1 | general_2002==1) & (year_age22==2002 | year_age22==2003)
 replace voted_by_age22 = 1 if (general_2000==1) & (year_age22==2000 | year_age22==2001)
 
gen voted_after_age22 = 0 if year_age22<=2018
 replace voted_after_age22 = 1 if (general_2000==1 | general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2000)
 replace voted_after_age22 = 1 if (general_2002==1 | general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2002)
 replace voted_after_age22 = 1 if (general_2004==1 | general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2004)
 replace voted_after_age22 = 1 if (general_2006==1 | general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2006)
 replace voted_after_age22 = 1 if (general_2008==1 | general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2008)
 replace voted_after_age22 = 1 if (general_2010==1 | general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2010)
 replace voted_after_age22 = 1 if (general_2012==1 | general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2012)
 replace voted_after_age22 = 1 if (general_2014==1 | general_2016==1 | general_2018==1) & (year_age22>=2014)
 replace voted_after_age22 = 1 if (general_2016==1 | general_2018==1) & (year_age22>=2016)
 replace voted_after_age22 = 1 if (general_2018==1) & (year_age22>=2018)
 
* voted ever
rename voted_ever voted_general

egen voted_ever = rowmax(voted_general voted_primary) 

rename black black_L2

* voting, IL voting records only
foreach var of varlist voted_ever voted_general voted_primary_ever pres_voted_share voted_share pres_elect_eligible general_elect_eligible general_2018 general_2016 general_2014 general_2012 general_2010 general_2008 general_2006 general_2004 general_2000 voted_by_age24 voted_by_age22 voted_after_age24 voted_after_age22 registered_anything registered_nonpartisan registered_republican registered_democrat {
    gen `var'_IL = `var' 
	replace `var'_IL = 0 if state_vote~="IL" & `var'==1
}
    
tempfile temp_voter_analysis
save "`temp_voter_analysis'", replace
 
* open the analysis file (which lacks voting measures)
gzuse "${demo}analysis_file_final.dta.gz", clear
 // created in "3_make_PH_panel_final"

* merge with matched voter records above
merge n:1 chdhsid_201212 using "`temp_voter_analysis'", keep(1 3) keepusing(voted_ever voted_general voted_primary_ever pres_voted_share voted_share pres_elect_eligible general_elect_eligible general_2018 general_2016 general_2014 general_2012 general_2010 general_2008 general_2006 general_2004 general_2000 voted_by_age24 voted_by_age22 voted_after_age24 voted_after_age22 registered_anything registered_nonpartisan registered_republican registered_democrat state_vote lalvoterid black_L2 *_IL)

gen voting_sample = (_merge==3)
drop _merge

*** clean file (this is based on Chyn 2018)

* take out two low rise buildings
drop if (xxbdg=="WE002" | xxbdg=="WE006") 
 
**** Create/edit necessary varaibles
gen female = (male==0)
gen female_treat = female*treat
gen male_treat = male*treat

* Labor market activity corrections
replace qtrs_yr=. if wages_yr>50000 & wages_yr~=.
replace employed_yr=. if wages_yr>50000 & wages_yr~=.
replace wages_yr=. if wages_yr>50000 & wages_yr~=.
gen cwages_yr = wages_yr if wages_yr>0

gen logcwage_yr = log(cwages_yr)

gen employed_ft = .
 replace employed_ft = 1 if employed_yr==1 & wages_yr>=14000
 replace employed_ft = 0 if employed_yr==0 
 replace employed_ft = 0 if employed_yr==1 & wages_yr<14000

* Create any_arrest by year
gen arrest_any_yr = violent_yr+property_yr+drugs_yr+other_yr

* Crime extensive margin
foreach var of varlist arrest_any_yr violent_yr property_yr drugs_yr other_yr {
 gen d`var' = (`var'>0)
 }
 
* Crime before age 18 and after demolition
foreach var of varlist arrest_any_yr violent_yr property_yr drugs_yr other_yr {
 gen temp = `var' if age_yr<18 & year>treat_yr
 bysort chdhsid_201212: egen `var'_pd_b18=total(temp)
 drop temp
 }
 
rename arrest_any_yr_pd_b18 arrest_any_pd_b18
rename violent_yr_pd_b18 violent_pd_b18
rename property_yr_pd_b18 property_pd_b18
rename drugs_yr_pd_b18 drugs_pd_b18
rename other_yr_pd_b18 other_pd_b18

* Crime correction
replace xxdrugs=. if xxdrugs>9 // 99th percentile censoring

*** Edits to baseline measures

* fix wage data (note how this is define: measured for the year prior to demolition OR 1995 Q1 for the 1995 demo sites)
replace xxemployed = . if xxwage>8000
replace xxwage = . if xxwage>8000 // outliers in terms of quarterly earnings
replace xxwage = 4*xxwage 
 // convert to annual to match with everything else in annual terms
 // assume that over 6,000 of earnings in one quarter is incorrect

foreach var of varlist xxviolent xxproperty xxdrugs xxother {
 replace `var' = . if xxany_arrest>7
 replace `var' = . if xxany_arrest>5 & xxage<=18
 }

replace xxany_arrest=. if xxany_arrest>7
replace xxany_arrest=. if xxany_arrest>5 & xxage<=18

foreach var of varlist xxany_arrest xxviolent xxproperty xxdrugs xxother {
 replace `var' = . if xxage<13 // replace if too young at baseline
 }

gen moved_from_ph_3pd = .
 replace moved_from_ph_3pd = 1 if ph_3pd==0
 replace moved_from_ph_3pd = 0 if ph_3pd==1
 
foreach var in pblack pbpov ppubass punemployed nbh_violent nbh_property {
 gen `var'_3_yr = `var' if year==treat_yr+3
 replace `var'_3_yr = . if year==treat_yr+3 & distance==0 & treat==1
} 

* fix high-rise public housing variable
replace ph_high_rise=0 if ph==0
replace ph_med_rise=0 if ph==0

replace ph_high_rise_3pd=0 if ph_3pd==0
replace ph_med_rise_3pd=0 if ph_3pd==0

*** 2016/12/13

* Get CHDHSID_200606
preserve
use "${xwalk_assist}crosswalk_2006_12.dta", clear
keep chdhsid_201212 chdhsid_200606
* randomly select a 2012 ID to keep (need this file to be unique)
set seed 1234
sort chdhsid_201212
by chdhsid_201212: gen temp = runiform()
sort chdhsid_201212 temp
by chdhsid_201212: gen temp2 = 1 if _n==1
keep if temp2==1
drop temp*
duplicates report chdhsid_201212
tempfile temp_ids
save `temp_ids', replace
restore

* Merge on 200606 IDs
merge n:1 chdhsid_201212 using `temp_ids', keep(1 3)
assert _merge==3
drop _merge

* Duplicates
bysort chdhsid_201212  year: egen total=total(1==1)
assert total==1
drop total

* merge demographics + chdhsid_200606
!gunzip "${temp}temp_file3_bdates_chdhsid_201212.dta.gz"
merge n:1 chdhsid_201212 using "${temp}temp_file3_bdates_chdhsid_201212.dta", keep(1 3) keepusing(black hispanic chdhsid_200606)
 // created in "3_make_PH_panel_final"
!gzip "${temp}temp_file3_bdates_chdhsid_201212.dta"
assert _merge==3
drop _merge

gen studyid=chdhsid_200606

* merge with sentencing data
merge n:1 studyid using "${temp}temp_sentenced.dta", keep(1 3) keepusing(sentenced sentenced_nonfine sentenced_imprison sentenced_imprison_b2008 imprison_* sentence_length)
 // created by "4_make_PH_imprisonment_final.do"
replace sentenced=0 if _merge==1
replace sentenced_nonfine=0 if _merge==1
replace sentenced_imprison=0 if _merge==1
replace sentence_length = 0 if _merge==1
replace sentenced_imprison_b2008 = 0 if _merge==1

* checks
assert sentence_length>0 if sentenced_imprison==1
assert sentenced_imprison_b2008<=sentenced_imprison

* outliers:	
sum sentence_length if sentence_length>0, det
replace sentence_length = `r(p99)' if sentence_length>`r(p99)'

* age and incarcerated status in each year
forvalues i=2000(1)2011 {
 gen age_`i' = xxage + (`i'-treat_yr)
 replace imprison_`i' = 0 if age_`i'>=18 & _merge==1
 replace imprison_`i' = . if age_`i'<18 & _merge==3 
 }
 
replace sentenced_imprison_b2008 = . if age_2008<18 

* age at different points in time (up to 2011)
forvalues i=2000(2)2011 {
 gen eligible_`i' = (age_`i'>=18)
 }

egen pres_election_imprison = rowtotal(imprison_2000 imprison_2004 imprison_2008) 
gen pres_elect_eligible_imprison = eligible_2000 + eligible_2004 + eligible_2008 
 // these measures can only be defined over the time span for sentencing in these years
gen pres_election_imprison_share = pres_election_imprison/pres_elect_eligible_imprison

* checks
assert pres_election_imprison_share~=. if pres_voted_share~=.
assert pres_election_imprison_share<=1 if pres_voted_share~=.
assert pres_election_imprison_share>=0

* age and incarcerated status in each year, conditional on being less than 24
gen imprison_by_age24 = 0 if age_2011>=24
 // missing if the child is not 24 years old by 2011 (the last complete year of sentencing data)

forvalues i=2000(1)2011 {
 replace imprison_by_age24 = 1 if imprison_`i' == 1 & (age_`i'>=18 & age_`i'<=24 & age_2011>=24)
  // replace if imprisoned in this year and in the relevant age range
 }
 
* checks
assert imprison_by_age24 == . if xxage==5
 // they never reach age 24 by 2011
 
drop age_20* eligible_20*
drop pres_election_imprison
drop pres_elect_eligible_imprison

drop _merge

* additional variables for heterogeneity analysis
gen xxageless12=(xxage>=5 & xxage<12)
gen xxage1218=(xxage>=12 & xxage<=18)
	
gen xxageless12_treat = xxageless12*treat
gen xxage1218_treat = xxage1218*treat
	
* Create subgroups based on household outcomes
preserve 
	* flatten to adults in the household
	gen keep = 0
	replace keep = 1 if (xxage>18 & tag==1)
	keep if keep==1
	
	bysort household_id: gen hh_size=_N
	assert hh_size>=1

	bysort household_id: egen hh_xxage=mean(xxage)
	bysort household_id: egen hh_xxearnings= total(xxwages)
	bysort household_id: egen hh_xxany_arrest= total(xxany_arrest)
		
	keep household_id hh_xxearnings hh_xxany_arrest

	duplicates report household_id

	duplicates drop
	
	tempfile temp
	save `temp', replace
restore

* merge persons with HH characteristics
merge n:1 household_id using `temp', keep(1 3) keepusing(hh_xxearnings hh_xxany_arrest)

gen hh_xxwork = (hh_xxearnings>0) if _merge==3
gen hh_xxnowork = (hh_xxearnings==0) if _merge==3

replace hh_xxwork=0 if _merge==1
replace hh_xxnowork=1 if _merge==1

gen hh_xxcrime = (hh_xxany_arrest>0) if _merge==3
gen hh_xxnocrime = (hh_xxany_arrest==0) if _merge==3

replace hh_xxcrime=0 if _merge==1
replace hh_xxnocrime=1 if _merge==1

drop _merge

gen hh_xxcrime_treat = hh_xxcrime*treat
gen hh_xxnocrime_treat = hh_xxnocrime*treat

gen hh_xxwork_treat = hh_xxwork*treat
gen hh_xxnowork_treat = hh_xxnowork*treat

* obtain more address data
tab year, sum(pbpov)
tab year, sum(ph)
assert pbpov==. if year==1990
 // always missing earlier pre-demolition years

sum pbpov if year>treat_yr
sum nbh_violent if year>treat_yr
sum ph if year>treat_yr

assert address=="" if year==1990
 // always missing

*** merge in the missing pre-demolition info (prior file has no neighborhood info beyond t=-2)

* census info
!gunzip "${temp}temp_addr_census_panel.dta.gz" 
merge 1:1 chdhsid_201212 year using "${temp}temp_addr_census_panel.dta", update
 // file created by "3_make_PH_panel_final"
!gzip "${temp}temp_addr_census_panel.dta"

drop if _merge==2

tab year _merge, m
*drop _merge
rename _merge temp_merge

assert has_addr_panel==0 if year<=treat_yr-3
 // this got set to 0

replace has_addr_panel = 1 if address~="" & year<=treat_yr-3
replace has_addr_panel = 0 if address=="" & year<=treat_yr-3
 // this should not initiate any changes

* get one more neighborhood characteristic
* Merge on neighborhood characteristics;
gen county=string(county,"%03.0f")
 replace county="" if county=="."
 replace county="" if state!="IL" | (state=="IL" & county!="031") 
gen tract=tract
 replace tract="" if county==""

gen temp=length(tract)
assert temp==0 | temp==6
drop temp

* Merge 1990 Census
merge n:1 county tract using "${demo}census_1990_cook_county.dta", keep(1 3) keepusing(pemployed)
tab year _merge
replace punemployed = pemployed if temp_merge==4
drop _merge pemployed temp_merge county tract

sum pbpov if year>treat_yr
 // this should match the previous line
 
* insure that this retains the address info for pre-demolition years (previously this hadn't been retained)
count if address~="" & year==1990

assert pbpov==. if pblack==.
assert pblack==. if pbpov==.

* get PH info
rename bdg temp
 
!gunzip "${demo}xwalk_case_addr_PH_buildings.dta.gz"
merge n:1 address state city zip using "${demo}xwalk_case_addr_PH_buildings.dta", keep(1 3) keepusing(bdg) 
!gzip "${demo}xwalk_case_addr_PH_buildings.dta"
 // this file is created by "2_make_addr_PH_buildings_xwalk_final.do"

drop _merge

replace temp = bdg if year<=treat_yr-3 & temp==""
 // fills in building for the pre-period observations

drop bdg
rename temp bdg

replace ph = 1 if bdg~="" & year<=treat_yr-3
replace ph = 0 if bdg=="" & address~="" & year<=treat_yr-3

gen proj = substr(bdg,1,2)
gen xxproj = substr(xxbdg,1,2)

sum ph if year>treat_yr

// note that crime data by year is not available for pre-demolition years (before 1995)

* use 1995 beat-crime rates for pre-demo addresses

assert nbh_violent==. if year<1995
assert nbh_property==. if year<1995

assert nbh_violent==. if year<treat_yr & treat_yr==1995
assert nbh_property==. if year<treat_yr & treat_yr==1995

* pre-demo crime data is only non-missing for 1996 and later demolitions
sum nbh_violent if year==treat_yr-1 & treat_yr==1996
sum nbh_property if year==treat_yr-1 & treat_yr==1996
 // these are available

sum nbh_violent if year>treat_yr

* beat info
rename tract2000 temp
gen tract = temp if county2000==31
gen length = length(tract)
assert length==6 | length==0 // ==0 if missing tract info
drop length

merge n:1 tract using "${demo}xwalk_beat_tract.dta", keep(1 3) keepusing(beat_num)
count if tract=="" & _merge==1
count if tract~="" & _merge==1
 // a small number of tracts don't merge to a beat
	
drop tract _merge
rename temp tract2000

rename year temp
gen year = 1995

* crime data (will be specific to 1995)
!gunzip "${beat_crime}beat_crime_panel.dta.gz"
merge n:1 beat_num year using "${beat_crime}beat_crime_panel.dta", keep(1 3) keepusing(violent_rate property_rate)
!gzip "${beat_crime}beat_crime_panel.dta"

assert _merge==3 if beat_num~=.
drop _merge

drop year
rename temp year

count if violent_rate~=. & nbh_violent==. & year==treat_yr-1

* replace using the newly merged data
replace nbh_violent=violent_rate if nbh_violent==. & year<=1995
sum violent_rate if nbh_violent==. & year<=1995
replace nbh_property=property_rate if nbh_property==. & year<=1995

tab year if nbh_violent==. & violent_rate~=.
tab year treat_yr if nbh_violent==. & violent_rate~=.
 // these all occur in the t=-2 and t=-1 periods
 // this is occuring b/c crime data was not available before 1995
 
* manual correction (assign remaining missing obs with 1995 crime rates)
replace nbh_violent=violent_rate if nbh_violent==. & year<=treat_yr & beat_num~=.
replace nbh_property=property_rate if nbh_property==. & year<=treat_yr & beat_num~=.

tab year if nbh_violent==. & violent_rate~=.

drop violent_rate property_rate

tab year treat, sum(nbh_violent)

* neighborhood voting rate
gen census_tract = tract2000
destring census_tract, replace

merge n:1 census_tract using "${voting}voting_by_tract.dta", keep(1 3) keepusing(nbh_vote_rate)
drop _merge

** generate duration-weighted measure of neighborhood poverty
cap drop temp
gen temp_yr = treat_yr+8
gen temp = pbpov if year>treat_yr & year<temp_yr
replace temp = . if distance==0 & treat==1 
 // these are times when the social assist records are not updated properly
bysort chdhsid_201212: egen pbpov_wtg = mean(temp)
drop temp

lab var pbpov_wtg "% of Neighborhood (Tract) Residents Below Poverty Line, Weighted"

* drop these measures
drop pbpov_3pd
drop pbpov_6pd
drop nbh_violent_3pd
drop nbh_violent_6pd

* create measures of neighborhood characteristics at different points in time
foreach var of varlist pbpov nbh_violent nbh_vote_rate {
	gen `var'_3pd = `var' if year==treat_yr+3
	gen `var'_6pd = `var' if year==treat_yr+6
	}

lab var pbpov_3pd "% of Neighborhood (Tract) Residents Below Poverty Line, 3 Years After"
lab var pbpov_6pd "% Below Poverty (Tract) Residents Below Poverty Line, 6 Years After"
lab var nbh_violent_3pd "Violent Crime Rate (Beat), 3 Years After"
lab var nbh_violent_6pd "Violent Crime Rate (Beat), 6 Years After"

* pre and post treatment indicators
forvalues i=0(1)10 {
	gen temp`i' = treat_yr+`i'
	gen post`i' = (year==temp`i')
	gen treat_post`i' = treat*post`i'
	}
	
drop temp*

* 10/29/2017 -- add pre-terms
forvalues i=1(1)5 {
	gen temp`i' = treat_yr-`i'
	gen pre`i' = (year==temp`i')
	gen treat_pre`i' = treat*pre`i'
	}
	
drop temp*

* time relative to demolition
gen t = year-treat_yr	

* checks
count if tag==1
count if tag==1 & voting_sample==1
	
compress

* save voting analysis file
gzsave "${demo}analysis_file_voting_final.dta.gz", replace 

cap log close


